First we’ll load in the packages we need to tidy & analyze our simulation results.
library(dplyr) # data manipulation
library(Seurat) # scRNA-seq tools & data structures
library(ggplot2) # plots
library(targets) # pipeline tools
library(paletteer) # plot colors
library(patchwork) # plot layouts
library(reticulate) # pythonNext we’ll load in the clustering algorithm outputs from out {targets} pipeline.
# pancreas reference
tar_load(clustres_panc_ncell1000_nclust3)
tar_load(clustres_panc_ncell1000_nclust5)
tar_load(clustres_panc_ncell1000_nclust7)
tar_load(clustres_panc_ncell3000_nclust3)
tar_load(clustres_panc_ncell3000_nclust5)
tar_load(clustres_panc_ncell3000_nclust7)
tar_load(clustres_panc_ncell5000_nclust3)
tar_load(clustres_panc_ncell5000_nclust5)
tar_load(clustres_panc_ncell5000_nclust7)
tar_load(clustres_panc_ncell10000_nclust3)
tar_load(clustres_panc_ncell10000_nclust5)
tar_load(clustres_panc_ncell10000_nclust7)
# lung reference
tar_load(clustres_lung_ncell1000_nclust3)
tar_load(clustres_lung_ncell1000_nclust5)
tar_load(clustres_lung_ncell1000_nclust7)
tar_load(clustres_lung_ncell3000_nclust3)
tar_load(clustres_lung_ncell3000_nclust5)
tar_load(clustres_lung_ncell3000_nclust7)
tar_load(clustres_lung_ncell5000_nclust3)
tar_load(clustres_lung_ncell5000_nclust5)
tar_load(clustres_lung_ncell5000_nclust7)
tar_load(clustres_lung_ncell10000_nclust3)
tar_load(clustres_lung_ncell10000_nclust5)
tar_load(clustres_lung_ncell10000_nclust7)Next we’ll coerce everything into one big dataframe. We’re not going to include the Leiden clustering results this time as they’re nearly identical to the Louvain results.
sim_results <- purrr::map(ls()[grepl("clustres", ls())],
function(x) {
df <- eval(as.symbol(x))
df <- mutate(df,
reference = case_when(stringr::str_detect(x, "panc") ~ "Pancreas",
TRUE ~ "Lung"),
dataset = x)
return(df)
}) %>%
purrr::reduce(rbind) %>%
filter(method != "Leiden (Seurat)") %>%
mutate(method = factor(method,
levels = c("K-means (Hartigan-Wong)",
"Hierarchical (Ward)",
"Louvain (Seurat)",
"DBSCAN",
"GiniClust3",
"SCISSORS"),
labels = c("K-means",
"Hierarchical",
"Louvain (Seurat)",
"DBSCAN",
"GiniClust3",
"SCISSORS")),
runtime_minutes = case_when(runtime_units == "secs" ~ runtime / 60,
runtime_units == "mins" ~ runtime,
runtime_units == "hours" ~ runtime * 60,
TRUE ~ NA_real_))First we’ll look at the distribution of ARI values achieved by each method over the entire parameter space. We note that this includes parameter values that are less than optimal for some methods, but this is somewhat realistic as it can be very difficult to obtain parameter optimality without ground truth labels for your data. In practice sub-optimal parameters are often chosen due to reliance on software defaults, researcher inexperience, or stochasticity in methods / analysis.
p0a <- ggplot(sim_results, aes(x = method, y = ari, fill = method)) +
facet_wrap(~reference) +
geom_violin(draw_quantiles = 0.5,
color = "black",
size = 1) +
ggsignif::geom_signif(comparisons = list(c("SCISSORS", "Louvain (Seurat)"),
c("SCISSORS", "DBSCAN"),
c("SCISSORS", "GiniClust3"),
c("SCISSORS", "K-means"),
c("SCISSORS", "Hierarchical")),
test = "wilcox.test",
step_increase = 0.08,
map_signif_level = TRUE,
vjust = 0.2,
textsize = 3) +
labs(fill = "Clustering Method", y = "Adjusted Rand Index") +
scale_y_continuous(labels = scales::percent_format(), breaks = seq(0, 1, by = 0.25)) +
scale_fill_paletteer_d("ggsci::nrc_npg") +
theme_classic(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 0.9, vjust = 0.9),
axis.title.x = element_blank(),
panel.grid.major.y = element_line())
p0aWe can make the same plot for overall performance, without splitting by reference dataset.
p0b <- ggplot(sim_results, aes(x = method, y = ari, fill = method)) +
geom_violin(draw_quantiles = 0.5,
color = "black",
size = 1) +
ggsignif::geom_signif(comparisons = list(c("SCISSORS", "Louvain (Seurat)"),
c("SCISSORS", "DBSCAN"),
c("SCISSORS", "GiniClust3"),
c("SCISSORS", "K-means"),
c("SCISSORS", "Hierarchical")),
test = "wilcox.test",
step_increase = 0.08,
map_signif_level = TRUE,
vjust = 0.2,
textsize = 3) +
labs(fill = "Clustering Method", y = "Adjusted Rand Index") +
scale_y_continuous(labels = scales::percent_format(), breaks = seq(0, 1, by = 0.25)) +
scale_fill_paletteer_d("ggsci::nrc_npg") +
theme_classic(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 0.9, vjust = 0.9),
axis.title.x = element_blank(),
panel.grid.major.y = element_line())
p0bWe’ll repeat the plot for the Normalized Mutual Information.
p1a <- ggplot(sim_results, aes(x = method, y = nmi, fill = method)) +
facet_wrap(~reference) +
geom_violin(draw_quantiles = 0.5,
color = "black",
size = 1) +
ggsignif::geom_signif(comparisons = list(c("SCISSORS", "Louvain (Seurat)"),
c("SCISSORS", "DBSCAN"),
c("SCISSORS", "GiniClust3"),
c("SCISSORS", "K-means"),
c("SCISSORS", "Hierarchical")),
test = "wilcox.test",
step_increase = 0.08,
map_signif_level = TRUE,
vjust = 0.2,
textsize = 3) +
labs(fill = "Clustering Method", y = "Normalied Mutual Information") +
scale_y_continuous(labels = scales::percent_format(), breaks = seq(0, 1, by = 0.25)) +
scale_fill_paletteer_d("ggsci::nrc_npg") +
theme_classic(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 0.9, vjust = 0.9),
axis.title.x = element_blank(),
panel.grid.major.y = element_line())
p1aAgain, the entire dataset without splitting by reference.
p1b <- ggplot(sim_results, aes(x = method, y = nmi, fill = method)) +
geom_violin(draw_quantiles = 0.5,
color = "black",
size = 1) +
ggsignif::geom_signif(comparisons = list(c("SCISSORS", "Louvain (Seurat)"),
c("SCISSORS", "DBSCAN"),
c("SCISSORS", "GiniClust3"),
c("SCISSORS", "K-means"),
c("SCISSORS", "Hierarchical")),
test = "wilcox.test",
step_increase = 0.08,
map_signif_level = TRUE,
vjust = 0.2,
textsize = 3) +
labs(fill = "Clustering Method", y = "Normalied Mutual Information") +
scale_y_continuous(labels = scales::percent_format(), breaks = seq(0, 1, by = 0.25)) +
scale_fill_paletteer_d("ggsci::nrc_npg") +
theme_classic(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 0.9, vjust = 0.9),
axis.title.x = element_blank(),
panel.grid.major.y = element_line())
p1bAnd lastly for the mean silhouette score.
p2a <- ggplot(sim_results, aes(x = method, y = sil, fill = method)) +
facet_wrap(~reference) +
geom_violin(draw_quantiles = 0.5,
color = "black",
size = 1) +
ggsignif::geom_signif(comparisons = list(c("SCISSORS", "Louvain (Seurat)"),
c("SCISSORS", "DBSCAN"),
c("SCISSORS", "GiniClust3"),
c("SCISSORS", "K-means"),
c("SCISSORS", "Hierarchical")),
test = "wilcox.test",
step_increase = 0.08,
map_signif_level = TRUE,
vjust = 0.2,
textsize = 3) +
labs(fill = "Clustering Method", y = "Mean Silhouette Score") +
scale_y_continuous(labels = scales::number_format(accuracy = .1),
breaks = seq(round(min(sim_results$sil)), 1, by = 0.25)) +
scale_fill_paletteer_d("ggsci::nrc_npg") +
theme_classic(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 0.9, vjust = 0.9),
axis.title.x = element_blank(),
panel.grid.major.y = element_line())
p2aWithout splitting by reference:
p2b <- ggplot(sim_results, aes(x = method, y = sil, fill = method)) +
geom_violin(draw_quantiles = 0.5,
color = "black",
size = 1) +
ggsignif::geom_signif(comparisons = list(c("SCISSORS", "Louvain (Seurat)"),
c("SCISSORS", "DBSCAN"),
c("SCISSORS", "GiniClust3"),
c("SCISSORS", "K-means"),
c("SCISSORS", "Hierarchical")),
test = "wilcox.test",
step_increase = 0.08,
map_signif_level = TRUE,
vjust = 0.2,
textsize = 3) +
labs(fill = "Clustering Method", y = "Mean Silhouette Score") +
scale_y_continuous(labels = scales::number_format(accuracy = .1),
breaks = seq(round(min(sim_results$sil)), 1, by = 0.25)) +
scale_fill_paletteer_d("ggsci::nrc_npg") +
theme_classic(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 0.9, vjust = 0.9),
axis.title.x = element_blank(),
panel.grid.major.y = element_line())
p2bNext we’ll subset the data to only include ARI values above the median for each method, which we are doing in order to mimic reasonable parameter selection by experienced researchers.
p3 <- sim_results %>%
with_groups(c(reference, method),
filter,
ari > median(ari)) %>%
ggplot(aes(x = method, y = ari, fill = method)) +
facet_wrap(~reference) +
geom_violin(draw_quantiles = 0.5,
color = "black",
size = 1) +
ggsignif::geom_signif(comparisons = list(c("SCISSORS", "Louvain (Seurat)"),
c("SCISSORS", "DBSCAN"),
c("SCISSORS", "GiniClust3"),
c("SCISSORS", "K-means"),
c("SCISSORS", "Hierarchical")),
test = "wilcox.test",
step_increase = 0.08,
map_signif_level = TRUE,
vjust = 0.2,
textsize = 3) +
labs(fill = "Clustering Method",
y = "Adjusted Rand Index",
caption = "Top 50% of clustering runs") +
scale_y_continuous(labels = scales::percent_format(), breaks = seq(0, 1, by = 0.25)) +
scale_fill_paletteer_d("ggsci::nrc_npg") +
theme_classic(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 0.9, vjust = 0.9),
axis.title.x = element_blank(),
panel.grid.major.y = element_line(),
plot.caption = element_text(face = "italic"))
p3We’ll repeat the filtered plot for the NMI of each method.
p4 <- sim_results %>%
with_groups(c(reference, method),
filter,
nmi > median(nmi)) %>%
ggplot(aes(x = method, y = nmi, fill = method)) +
facet_wrap(~reference) +
geom_violin(draw_quantiles = 0.5,
color = "black",
size = 1) +
ggsignif::geom_signif(comparisons = list(c("SCISSORS", "Louvain (Seurat)"),
c("SCISSORS", "DBSCAN"),
c("SCISSORS", "GiniClust3"),
c("SCISSORS", "K-means"),
c("SCISSORS", "Hierarchical")),
test = "wilcox.test",
step_increase = 0.08,
map_signif_level = TRUE,
vjust = 0.2,
textsize = 3) +
labs(fill = "Clustering Method",
y = "Normalized Mutual Information",
caption = "Top 50% of clustering runs") +
scale_y_continuous(labels = scales::percent_format(), breaks = seq(0, 1, by = 0.25)) +
scale_fill_paletteer_d("ggsci::nrc_npg") +
theme_classic(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 0.9, vjust = 0.9),
axis.title.x = element_blank(),
panel.grid.major.y = element_line(),
plot.caption = element_text(face = "italic"))
p4As well as for the silhouette score.
p5 <- sim_results %>%
with_groups(c(reference, method),
filter,
sil > median(sil)) %>%
ggplot(aes(x = method, y = sil, fill = method)) +
facet_wrap(~reference) +
geom_violin(draw_quantiles = 0.5,
color = "black",
size = 1) +
ggsignif::geom_signif(comparisons = list(c("SCISSORS", "Louvain (Seurat)"),
c("SCISSORS", "DBSCAN"),
c("SCISSORS", "GiniClust3"),
c("SCISSORS", "K-means"),
c("SCISSORS", "Hierarchical")),
test = "wilcox.test",
step_increase = 0.08,
map_signif_level = TRUE,
vjust = 0.2,
textsize = 3) +
labs(fill = "Clustering Method",
y = "Silhouette Score",
caption = "Top 50% of clustering runs") +
scale_y_continuous(labels = scales::number_format(accuracy = .1),
breaks = seq(round(min(sim_results$sil)), 1, by = 0.25)) +
scale_fill_paletteer_d("ggsci::nrc_npg") +
theme_classic(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 0.9, vjust = 0.9),
axis.title.x = element_blank(),
panel.grid.major.y = element_line(),
plot.caption = element_text(face = "italic"))
p5We can also plot an example of how the various methods categorize an example dataset. We’ll need to run each method individually, using the same code that we did in our {targets} pipeline. First we’ll load in a simulated dataset from the pancreas reference with 5,000 cells and 7 clusters.
tar_load(sim_panc_ncell1000_nclust7)Here’s what the ground truth labels from our simulations look like. Some of the clusters are easily separable, and some others are less so.
sim_panc_ncell1000_nclust7@meta.data$cellPopulation2 <- as.integer(sim_panc_ncell1000_nclust7@meta.data$cellPopulation) - 1L
p6 <- DimPlot(sim_panc_ncell1000_nclust7, reduction = "tsne", group.by = "cellPopulation2") +
scale_color_paletteer_d("ggsci::category10_d3") +
labs(x = "t-SNE 1",
y = "t-SNE 2",
color = "True\nLabel") +
theme_classic(base_size = 14) +
theme(axis.ticks = element_blank(),
axis.text = element_blank(),
plot.title = element_blank(),
legend.title = element_text(face = "bold")) +
guides(color = guide_legend(override.aes = list(size = 3)))
p6Here we run the {SCISSORS} reclustering over all clusters, optimizing for maximum silhouette score. First, we estimate an initial broad clustering, which we can compare to the true labels below.
seu <- Seurat::FindClusters(sim_panc_ncell1000_nclust7,
resolution = 0.1,
algorithm = 1,
random.seed = 312,
verbose = FALSE)Now we can recluster. We’ll use the default value of the mean silhouette cutoff score, 0.25. For other parameters, we use the same sets of possible values that we used in evaluating performance on all our simulated datasets. For the number of principal components, we’ll use 20 as there’s not too many cells in this dataset.
SCISSORS_clusts <- SCISSORS::ReclusterCells(seurat.object = seu,
auto = TRUE,
use.parallel = FALSE,
resolution.vals = seq(0.1, 0.7, by = c(0.1)),
k.vals = c(10, 25, 40, 60),
merge.clusters = FALSE,
use.sct = FALSE,
n.HVG = 2000,
n.PC = 20,
cutoff.score = 0.25,
random.seed = 312)## [1] "Choosing reclustering candidates automatically."
## [1] "Reclustering cells in cluster 5 using k = 40 & resolution = 0.4; S = 0.296"
seu_new <- SCISSORS::IntegrateSubclusters(original.object = seu, reclust.results = SCISSORS_clusts)
SCISSORS_clusts <- as.integer(seu_new$seurat_clusters) - 1LNext we run Louvain clustering with a reasonable resolution value of 0.5. We’ll make sure to also use 20 PCs in the creation of the SNN graph here.
Louvain_clusts <- Seurat::FindNeighbors(sim_panc_ncell1000_nclust7,
reduction = "pca",
dims = 1:20,
nn.method = "annoy",
annoy.metric = "cosine",
verbose = FALSE) %>%
Seurat::FindClusters(resolution = 0.5,
algorithm = 1,
random.seed = 312,
verbose = FALSE)
Louvain_clusts <- as.integer(Louvain_clusts$seurat_clusters) - 1LWe’ll run Leiden as well, with the same resolution.
reticulate::use_virtualenv("/nas/longleaf/home/jrleary/Python", required = TRUE)
Leiden_clusts <- Seurat::FindClusters(sim_panc_ncell1000_nclust7,
resolution = 0.5,
algorithm = 4,
random.seed = 312,
verbose = FALSE)$seurat_clusters
Leiden_clusts <- as.integer(Leiden_clusts) - 1LFor \(k\)-means, we’ll use the true value \(k = 7\), even though in practice we won’t know that parameter.
kmeans_clusts <- kmeans(Embeddings(sim_panc_ncell1000_nclust7, "pca")[, 1:20],
centers = 7,
nstart = 5,
algorithm = "Hartigan-Wong")$cluster
kmeans_clusts <- as.integer(kmeans_clusts) - 1LWe’ll do the same with hierarchical clustering.
hclust_tree <- hclust(SCISSORS::CosineDist(Embeddings(sim_panc_ncell1000_nclust7, "pca")[, 1:20]), method = "ward.D2")
hclust_clusts <- as.integer(cutree(hclust_tree, k = 7)) - 1LLastly, for DBSCAN ’ll first choose a reasonable value for the epsilon parameter by looking for the inflection point in the KNN distance plot. In our simulations we did this automatically by running several segmented regressions for changepoint detection and iterating over those values, but in this case we’ll choose one manually. It looks like \(\epsilon = 8\) is a reasonable value.
dbscan::kNNdistplot(Embeddings(sim_panc_ncell1000_nclust7, "pca")[, 1:20], k = 10)
abline(h = 8, col = "firebrick", lty = "dashed")dbscan_clusts <- dbscan::dbscan(scale(sim_panc_ncell1000_nclust7@reductions$pca@cell.embeddings[, 1:20], scale = FALSE),
eps = 8,
minPts = 5,
borderPoints = TRUE)$clusterLastly we’ll run GiniClust3 using the default parameters. We’ll need to switch to Python to use this method, but first we’ll need to make sure we can read the counts matrix from R to Python.
count_mat <- as.matrix(sim_panc_ncell1000_nclust7@assays$RNA@counts)Now we can run the algorithm. We’ll use the default values for parameters of interest as found in the {GiniClust3} docs.
# import libraries
import anndata # annotated single cell data
import numpy as np # matrix algebra
import pandas as pd # DataFrames
import scanpy as sc # ScanPy
import giniclust3 as gc # GiniClust3
# create AnnData object
adata = anndata.AnnData(X=r.count_mat.T)
sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e4)
# calculate clustering
gc.gini.calGini(adata, selection='p_value', p_value=0.001)## Gene number is 15270
## Cell number is 996
gc.fano.calFano(adata, method='scanpy')
adataGini = gc.gini.clusterGini(adata, resolution=0.1, neighbors=5)
adataFano = gc.fano.clusterFano(adata, resolution=0.1, neighbors=15)
consensusCluster = {}
consensusCluster['giniCluster'] = np.array(adata.obs['rare'].values.tolist())
consensusCluster['fanoCluster'] = np.array(adata.obs['fano'].values.tolist())
gc.consensus.generateMtilde(consensusCluster)
gc.consensus.clusterMtilde(consensusCluster)We now bring the clustering results back into R.
giniclust3_clusts <- as.integer(py$consensusCluster$finalCluster)Now let’s bring all our value together & plot them.
clust_comp <- data.frame(SCISSORS = SCISSORS_clusts,
Louvain = Louvain_clusts,
Leiden = Leiden_clusts,
Kmeans = kmeans_clusts,
Hierarchical = hclust_clusts,
DBSCAN = dbscan_clusts,
GiniClust3 = giniclust3_clusts,
tSNE_1 = Embeddings(sim_panc_ncell1000_nclust7, "tsne")[, 1],
tSNE_2 = Embeddings(sim_panc_ncell1000_nclust7, "tsne")[, 2]) %>%
tidyr::pivot_longer(!contains("tSNE"), names_to = "method", values_to = "cluster") %>%
mutate(across(!contains("tSNE"), as.factor)) %>%
filter(method != "Leiden")
p7 <- ggplot(clust_comp, aes(x = tSNE_1, y = tSNE_2, color = cluster)) +
facet_wrap(~method) +
geom_point(size = 0.75) +
scale_color_paletteer_d("ggsci::category10_d3") +
labs(x = "t-SNE 1",
y = "t-SNE 2",
color = "Estimated\nCluster") +
theme_classic(base_size = 14) +
theme(axis.ticks = element_blank(),
axis.text = element_blank(),
plot.title = element_blank(),
legend.title = element_text(face = "bold")) +
guides(color = guide_legend(override.aes = list(size = 3)))
p7We’ll also put together sub-tables of the ARI values & mean silhouette scores for each method.
ari_res <- data.frame(ARI = c(mclust::adjustedRandIndex(sim_panc_ncell1000_nclust7$cellPopulation, SCISSORS_clusts),
mclust::adjustedRandIndex(sim_panc_ncell1000_nclust7$cellPopulation, Louvain_clusts),
mclust::adjustedRandIndex(sim_panc_ncell1000_nclust7$cellPopulation, kmeans_clusts),
mclust::adjustedRandIndex(sim_panc_ncell1000_nclust7$cellPopulation, hclust_clusts),
mclust::adjustedRandIndex(sim_panc_ncell1000_nclust7$cellPopulation, dbscan_clusts),
mclust::adjustedRandIndex(sim_panc_ncell1000_nclust7$cellPopulation, giniclust3_clusts)),
Method = c("SCISSORS", "Louvain", "K-means", "Hierarchical", "DBSCAN", "GiniClust3")) %>%
arrange(desc(ARI)) %>%
mutate(ARI_Rank = row_number()) %>%
select(Method, ARI, ARI_Rank)
sil_res <- data.frame(Sil = c(mean(cluster::silhouette(SCISSORS::CosineDist(Embeddings(sim_panc_ncell1000_nclust7, "pca")[, 1:20]), x = SCISSORS_clusts)[, 3]),
mean(cluster::silhouette(SCISSORS::CosineDist(Embeddings(sim_panc_ncell1000_nclust7, "pca")[, 1:20]), x = Louvain_clusts)[, 3]),
mean(cluster::silhouette(SCISSORS::CosineDist(Embeddings(sim_panc_ncell1000_nclust7, "pca")[, 1:20]), x = kmeans_clusts)[, 3]),
mean(cluster::silhouette(SCISSORS::CosineDist(Embeddings(sim_panc_ncell1000_nclust7, "pca")[, 1:20]), x = hclust_clusts)[, 3]),
mean(cluster::silhouette(SCISSORS::CosineDist(Embeddings(sim_panc_ncell1000_nclust7, "pca")[, 1:20]), x = dbscan_clusts)[, 3]),
mean(cluster::silhouette(SCISSORS::CosineDist(Embeddings(sim_panc_ncell1000_nclust7, "pca")[, 1:20]), x = giniclust3_clusts)[, 3])),
Method = c("SCISSORS", "Louvain", "K-means", "Hierarchical", "DBSCAN", "GiniClust3")) %>%
arrange(desc(Sil)) %>%
mutate(Sil_Rank = row_number()) %>%
select(Method, Sil, Sil_Rank)
ari_table <- gridExtra::tableGrob(ari_res,
cols = c("Method", "Adj. Rand Index", "Rank"),
theme = gridExtra::ttheme_minimal(colhead = list(bg_params = list(fill = "grey90"))),
rows = NULL)
sil_table <- gridExtra::tableGrob(sil_res,
cols = c("Method", "Mean Silhouette Score", "Rank"),
theme = gridExtra::ttheme_minimal(colhead = list(bg_params = list(fill = "grey90"))),
rows = NULL)
metric_res <- ari_res %>%
inner_join(sil_res, by = "Method") %>%
mutate(Combined_Rank = ARI_Rank + Sil_Rank) %>%
arrange(Combined_Rank) %>%
mutate(Rank = row_number()) %>%
select(Method,
ARI,
ARI_Rank,
Sil,
Sil_Rank,
Rank)
metric_table <- gridExtra::tableGrob(metric_res,
cols = c("Method", "ARI", "ARI Rank", "Silhouette", "Silhouette Rank", "Overall Rank"),
theme = gridExtra::ttheme_minimal(colhead = list(bg_params = list(fill = "grey90"))),
rows = NULL)Looking at the results, we see that {SCISSORS} almost exactly recaptures the original clustering, and also provides the best measure of cluster fit (silhouette score). Even methods that know the true number of cluster a priori, \(k\)-means and hierarchical clustering, ignore the two small subclusters in favor of splitting up larger clusters.
p8a <- (p7 / (((p6 + facet_wrap(~"Ground Truth")) | metric_table))) +
plot_layout(heights = c(2, 1))
p8aLastly, we’ll show that simply naively increasing the resolution parameter of the Louvain clustering does not lead to better results.
Louvain_res_incr <- purrr::map(c(.5, .7, .9, 1.1, 1.5, 2), function(x) {
cluster_res <- Seurat::FindClusters(sim_panc_ncell1000_nclust7,
resolution = x,
algorithm = 1,
random.seed = 312,
verbose = FALSE)
cluster_vals <- as.integer(cluster_res$seurat_clusters) - 1L
ari_val <- mclust::adjustedRandIndex(cluster_res$seurat_clusters, sim_panc_ncell1000_nclust7$cellPopulation)
sil_val <- mean(cluster::silhouette(SCISSORS::CosineDist(Embeddings(sim_panc_ncell1000_nclust7, "pca")[, 1:20]),
x = as.integer(cluster_res$seurat_clusters) - 1L)[, 3])
return(list(Clustering = cluster_vals,
ARI = ari_val,
Silhouette = sil_val))
})
res_df <- data.frame(resolution = rep(c(.5, .7, .9, 1.1, 1.5, 2), each = ncol(sim_panc_ncell1000_nclust7)),
clust = unlist(purrr::map(Louvain_res_incr, \(x) x$Clustering)),
tSNE_1 = rep(Embeddings(sim_panc_ncell1000_nclust7, "tsne")[, 1], 6),
tSNE_2 = rep(Embeddings(sim_panc_ncell1000_nclust7, "tsne")[, 2], 6)) %>%
mutate(resolution = factor(resolution, labels = c("0.5", "0.7", "0.9", "1.1", "1.5", "2.0")),
clust = as.factor(clust))
louvain_metric_res <- data.frame(Res = factor(c(.5, .7, .9, 1.1, 1.5, 2.0),
labels = c("0.5", "0.7", "0.9", "1.1", "1.5", "2.0")),
ARI = purrr::map_dbl(Louvain_res_incr, \(x) x$ARI),
Sil = purrr::map_dbl(Louvain_res_incr, \(x) x$ARI)) %>%
arrange(desc(ARI))
louvain_ari_table <- gridExtra::tableGrob(louvain_metric_res,
cols = c("Resolution", "Adj. Rand Index", "Silhouette"),
theme = gridExtra::ttheme_minimal(colhead = list(bg_params = list(fill = "grey90"))),
rows = NULL)We see that the Louvain algorithm tends to split off larger clusters first before identifying the true split between the two smaller clusters (true clusters 5 & 6).
p8b <- ggplot(res_df, aes(x = tSNE_1, y = tSNE_2, color = clust)) +
facet_wrap(~paste0("Resolution: ", resolution)) +
geom_point(size = 0.75) +
scale_color_paletteer_d("ggsci::category10_d3") +
labs(x = "t-SNE 1",
y = "t-SNE 2",
color = "Louvain\nCluster") +
theme_classic(base_size = 14) +
theme(axis.ticks = element_blank(),
axis.text = element_blank(),
plot.title = element_blank(),
legend.title = element_text(face = "bold")) +
guides(color = guide_legend(override.aes = list(size = 3)))
(p8b / (p6 | louvain_ari_table)) + plot_layout(heights = c(2, 1)) Next, let’s take a look at how the runtimes of the various methods compare. {SCISSORS} has the highest runtime, as expected because it performs multiple runs of the Louvain algorithm, as opposed to most of the simpler methods which are single-pass.
p9a <- sim_results %>%
mutate(dataset_size = stringr::str_extract(dataset, "ncell.*_"),
dataset_size = stringr::str_replace(stringr::str_replace(dataset_size, "ncell", ""), "_", ""),
dataset_size = as.integer(dataset_size),
dataset_size = factor(dataset_size, levels = c("1000", "3000", "5000", "10000"))) %>%
ggplot(aes(x = method, y = runtime_minutes, fill = method)) +
facet_wrap(~method, scales = "free_x") +
geom_violin(draw_quantiles = 0.5,
scale = "width",
size = 1,
color = "black") +
scale_y_continuous(trans = "log", labels = scales::number_format(accuracy = .1)) +
scale_fill_paletteer_d("ggsci::nrc_npg") +
labs(y = "log(Runtime)", fill = "Clustering Method") +
theme_classic(base_size = 14) +
theme(axis.ticks.x = element_blank(),
panel.grid.major.y = element_line(),
axis.title.x = element_blank())
p9aWe’ll also plot un-transformed runtime, though the plot is harder to read because of the disparity in runtimes between simpler and more complex methods. It’s expected that {SCISSORS} have longer runtimes as it’s a multi-pass algorithm, but even for larger datasets it rarely exceeds 10 minutes of runtime thanks to automatic identification of reclustering targets.
p9b <- sim_results %>%
mutate(dataset_size = stringr::str_extract(dataset, "ncell.*_"),
dataset_size = stringr::str_replace(stringr::str_replace(dataset_size, "ncell", ""), "_", ""),
dataset_size = as.integer(dataset_size),
dataset_size = factor(dataset_size,
levels = c(1000L, 3000L, 5000L, 10000L),
labels = c("1,000", "3,000", "5,000", "10,000"))) %>%
ggplot(aes(x = dataset_size, y = runtime_minutes, fill = method, group = dataset_size)) +
facet_wrap(~method, scales = "free") +
geom_violin(draw_quantiles = 0.5,
position = position_dodge(1),
scale = "width",
size = 1,
color = "black") +
scale_y_continuous(labels = scales::number_format(accuracy = .01)) +
scale_fill_paletteer_d("ggsci::nrc_npg") +
labs(x = "Number of Cells",
y = "Runtime (minutes)",
fill = "Clustering Method") +
theme_classic(base_size = 14) +
theme(axis.ticks.x = element_blank(),
panel.grid.major.y = element_line())
p9bFirst we’ll define a convenience function to help save our plots.
fig_save <- function(plot.obj, plot.name = "", dims = c(9, 5)) {
ggplot2::ggsave(filename = plot.name,
path = "./figures",
plot = plot.obj,
device = "pdf",
width = dims[1],
height = dims[2],
units = "in",
dpi = "retina")
}Now let’s save them all as PDFs.
fig_save(p0a, plot.name = "Total_ARI_By_Method_And_Reference.pdf", dims = c(10, 6))
fig_save(p0b, plot.name = "Total_ARI_By_Method.pdf", dims = c(10, 6))
fig_save(p1a, plot.name = "Total_NMI_By_Method_And_Reference.pdf", dims = c(10, 6))
fig_save(p1b, plot.name = "Total_NMI_By_Method.pdf", dims = c(10, 6))
fig_save(p2a, plot.name = "Total_Silhouette_By_Method_And_Reference.pdf", dims = c(10, 6))
fig_save(p2b, plot.name = "Total_Silhouette_By_Method.pdf", dims = c(10, 6))
fig_save(p3, plot.name = "Above_Median_ARI_By_Method_And_Reference.pdf", dims = c(10, 6))
fig_save(p4, plot.name = "Above_Median_NMI_By_Method_And_Reference.pdf", dims = c(10, 6))
fig_save(p5, plot.name = "Above_Median_Silhouette_By_Method_And_Reference.pdf", dims = c(10, 6))
fig_save(p6, plot.name = "Case_Study_True_Label_tSNE.pdf", dims = c(8, 4.5))
fig_save(p7, plot.name = "Case_Study_Estimated_Clusters_All_tSNE.pdf", dims = c(12, 7))
fig_save(p8a, plot.name = "Case_Study_Estimated_Clusters_Performance.pdf", dims = c(15, 8))
fig_save(p8b, plot.name = "Case_Study_Louvain_Resolution_Performance.pdf", dims = c(15, 8))
fig_save(p9a, plot.name = "Runtime__Log_By_Method_And_Reference.pdf", dims = c(12, 7))
fig_save(p9b, plot.name = "Runtime_Raw_By_Method_And_Number_Cells.pdf", dims = c(12, 7))sessioninfo::session_info()## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.1.3 (2022-03-10)
## os Red Hat Enterprise Linux 8.5 (Ootpa)
## system x86_64, linux-gnu
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/New_York
## date 2022-11-13
## pandoc 2.11.4 @ /nas/longleaf/apps/rstudio-server/1.4.1717/bin/pandoc/ (via rmarkdown)
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date (UTC) lib source
## abind 1.4-5 2016-07-21 [2] CRAN (R 4.1.3)
## AnnotationDbi 1.56.2 2021-11-09 [2] Bioconductor
## aricode * 1.0.1 2022-09-05 [1] CRAN (R 4.1.3)
## assertthat 0.2.1 2019-03-21 [2] CRAN (R 4.1.3)
## backports 1.4.1 2021-12-13 [1] CRAN (R 4.1.3)
## base64url 1.4 2018-05-14 [1] CRAN (R 4.1.3)
## Biobase * 2.54.0 2021-10-26 [2] Bioconductor
## BiocFileCache 2.2.1 2022-01-23 [2] Bioconductor
## BiocGenerics * 0.40.0 2021-10-26 [2] Bioconductor
## biomaRt 2.50.3 2022-02-03 [2] Bioconductor
## Biostrings 2.62.0 2021-10-26 [2] Bioconductor
## bit 4.0.4 2020-08-04 [2] CRAN (R 4.1.3)
## bit64 4.0.5 2020-08-30 [2] CRAN (R 4.1.3)
## bitops 1.0-7 2021-04-24 [2] CRAN (R 4.1.3)
## blob 1.2.3 2022-04-10 [1] CRAN (R 4.1.3)
## broom 1.0.0 2022-07-01 [1] CRAN (R 4.1.3)
## bslib 0.4.0 2022-07-16 [1] CRAN (R 4.1.3)
## cachem 1.0.6 2021-08-19 [1] CRAN (R 4.1.0)
## callr 3.7.2 2022-08-22 [2] CRAN (R 4.1.3)
## cellranger 1.1.0 2016-07-27 [2] CRAN (R 4.1.3)
## cli 3.3.0 2022-04-25 [1] CRAN (R 4.1.3)
## cluster * 2.1.3 2022-03-28 [1] CRAN (R 4.1.0)
## codetools 0.2-18 2020-11-04 [2] CRAN (R 4.1.3)
## colorspace 2.0-3 2022-02-21 [1] CRAN (R 4.1.0)
## cowplot 1.1.1 2020-12-30 [2] CRAN (R 4.1.3)
## crayon 1.5.1 2022-03-26 [1] CRAN (R 4.1.3)
## curl 4.3.2 2021-06-23 [2] CRAN (R 4.1.3)
## data.table 1.14.2 2021-09-27 [1] CRAN (R 4.1.0)
## DBI 1.1.3 2022-06-18 [1] CRAN (R 4.1.0)
## dbplyr 2.2.1 2022-06-27 [1] CRAN (R 4.1.3)
## dbscan * 1.1-11 2022-10-27 [1] CRAN (R 4.1.3)
## DelayedArray 0.20.0 2021-10-26 [2] Bioconductor
## deldir 1.0-6 2021-10-23 [1] CRAN (R 4.1.3)
## digest 0.6.29 2021-12-01 [1] CRAN (R 4.1.0)
## doParallel 1.0.17 2022-02-07 [1] CRAN (R 4.1.3)
## dplyr * 1.0.9 2022-04-28 [1] CRAN (R 4.1.3)
## ellipsis 0.3.2 2021-04-29 [2] CRAN (R 4.1.3)
## evaluate 0.15 2022-02-18 [1] CRAN (R 4.1.0)
## fansi 1.0.3 2022-03-24 [1] CRAN (R 4.1.0)
## farver 2.1.1 2022-07-06 [2] CRAN (R 4.1.3)
## fastmap 1.1.0 2021-01-25 [2] CRAN (R 4.1.3)
## filelock 1.0.2 2018-10-05 [2] CRAN (R 4.1.3)
## fitdistrplus 1.1-8 2022-03-10 [1] CRAN (R 4.1.3)
## forcats * 0.5.2 2022-08-19 [2] CRAN (R 4.1.3)
## foreach 1.5.2 2022-02-02 [1] CRAN (R 4.1.3)
## fs 1.5.2 2021-12-08 [1] CRAN (R 4.1.0)
## future * 1.27.0 2022-07-22 [1] CRAN (R 4.1.3)
## future.apply 1.9.0 2022-04-25 [1] CRAN (R 4.1.3)
## future.callr * 0.8.0 2022-04-01 [1] CRAN (R 4.1.3)
## gargle 1.2.1 2022-09-08 [2] CRAN (R 4.1.3)
## generics 0.1.3 2022-07-05 [1] CRAN (R 4.1.3)
## GenomeInfoDb * 1.28.4 2021-09-05 [1] Bioconductor
## GenomeInfoDbData 1.2.7 2022-03-31 [2] Bioconductor
## GenomicRanges * 1.46.1 2021-11-18 [2] Bioconductor
## ggplot2 * 3.3.6 2022-05-03 [1] CRAN (R 4.1.3)
## ggrepel 0.9.1 2021-01-15 [1] CRAN (R 4.1.0)
## ggridges 0.5.3 2021-01-08 [2] CRAN (R 4.1.3)
## ggsignif 0.6.3 2021-09-09 [1] CRAN (R 4.1.0)
## globals 0.15.1 2022-06-24 [1] CRAN (R 4.1.3)
## glue 1.6.0 2021-12-17 [1] CRAN (R 4.1.0)
## goftest 1.2-3 2021-10-07 [1] CRAN (R 4.1.0)
## googledrive 2.0.0 2021-07-08 [2] CRAN (R 4.1.3)
## googlesheets4 1.0.1 2022-08-13 [2] CRAN (R 4.1.3)
## gridExtra 2.3 2017-09-09 [2] CRAN (R 4.1.3)
## gtable 0.3.1 2022-09-01 [2] CRAN (R 4.1.3)
## haven 2.5.0 2022-04-15 [1] CRAN (R 4.1.3)
## here 1.0.1 2020-12-13 [1] CRAN (R 4.1.0)
## highr 0.9 2021-04-16 [2] CRAN (R 4.1.3)
## hms 1.1.1 2021-09-26 [1] CRAN (R 4.1.0)
## htmltools 0.5.3 2022-07-18 [1] CRAN (R 4.1.3)
## htmlwidgets 1.5.4 2021-09-08 [1] CRAN (R 4.1.0)
## httpuv 1.6.5 2022-01-05 [1] CRAN (R 4.1.3)
## httr 1.4.3 2022-05-04 [1] CRAN (R 4.1.3)
## ica 1.0-3 2022-07-08 [2] CRAN (R 4.1.3)
## igraph 1.3.4 2022-07-19 [1] CRAN (R 4.1.3)
## iotools 0.3-2 2021-07-23 [1] CRAN (R 4.1.3)
## IRanges * 2.28.0 2021-10-26 [2] Bioconductor
## irlba 2.3.5 2021-12-06 [1] CRAN (R 4.1.3)
## iterators 1.0.14 2022-02-05 [1] CRAN (R 4.1.0)
## jquerylib 0.1.4 2021-04-26 [2] CRAN (R 4.1.3)
## jsonlite 1.8.0 2022-02-22 [1] CRAN (R 4.1.0)
## KEGGREST 1.34.0 2021-10-26 [2] Bioconductor
## KernSmooth 2.23-20 2021-05-03 [2] CRAN (R 4.1.3)
## knitr 1.39 2022-04-26 [1] CRAN (R 4.1.3)
## labeling 0.4.2 2020-10-20 [2] CRAN (R 4.1.3)
## later 1.3.0 2021-08-18 [1] CRAN (R 4.1.0)
## lattice 0.20-45 2021-09-22 [2] CRAN (R 4.1.3)
## lazyeval 0.2.2 2019-03-15 [2] CRAN (R 4.1.3)
## leiden 0.4.2 2022-05-09 [1] CRAN (R 4.1.3)
## lifecycle 1.0.1 2021-09-24 [1] CRAN (R 4.1.0)
## listenv 0.8.0 2019-12-05 [2] CRAN (R 4.1.3)
## lmtest 0.9-40 2022-03-21 [1] CRAN (R 4.1.3)
## logging 0.10-108 2019-07-14 [1] CRAN (R 4.1.3)
## lubridate 1.8.0 2021-10-07 [1] CRAN (R 4.1.0)
## magrittr * 2.0.3 2022-03-30 [1] CRAN (R 4.1.0)
## MASS * 7.3-58 2022-07-14 [1] CRAN (R 4.1.3)
## Matrix 1.5-1 2022-09-13 [1] CRAN (R 4.1.3)
## MatrixGenerics * 1.4.3 2021-08-26 [1] Bioconductor
## matrixStats * 0.62.0 2022-04-19 [1] CRAN (R 4.1.3)
## mclust * 5.4.10 2022-05-20 [1] CRAN (R 4.1.0)
## memoise 2.0.1 2021-11-26 [2] CRAN (R 4.1.3)
## mgcv 1.8-40 2022-03-29 [1] CRAN (R 4.1.0)
## mime 0.12 2021-09-28 [1] CRAN (R 4.1.0)
## miniUI 0.1.1.1 2018-05-18 [2] CRAN (R 4.1.3)
## modelr 0.1.9 2022-08-19 [2] CRAN (R 4.1.3)
## munsell 0.5.0 2018-06-12 [2] CRAN (R 4.1.3)
## nlme * 3.1-158 2022-06-15 [1] CRAN (R 4.1.0)
## paletteer * 1.5.0 2022-10-19 [1] CRAN (R 4.1.3)
## parallelly 1.32.1 2022-07-21 [1] CRAN (R 4.1.3)
## patchwork * 1.1.1 2020-12-17 [1] CRAN (R 4.1.0)
## pbapply 1.5-0 2021-09-16 [1] CRAN (R 4.1.0)
## phateR 1.0.7 2021-02-12 [1] CRAN (R 4.1.0)
## pillar 1.8.0 2022-07-18 [1] CRAN (R 4.1.3)
## pkgconfig 2.0.3 2019-09-22 [2] CRAN (R 4.1.3)
## plotly 4.10.0 2021-10-09 [1] CRAN (R 4.1.0)
## plyr 1.8.7 2022-03-24 [1] CRAN (R 4.1.0)
## png 0.1-7 2013-12-03 [2] CRAN (R 4.1.3)
## polyclip 1.10-0 2019-03-14 [2] CRAN (R 4.1.3)
## prettyunits 1.1.1 2020-01-24 [2] CRAN (R 4.1.3)
## prismatic 1.1.0 2021-10-17 [1] CRAN (R 4.1.0)
## processx 3.7.0 2022-07-07 [1] CRAN (R 4.1.3)
## progress 1.2.2 2019-05-16 [2] CRAN (R 4.1.3)
## progressr 0.10.1 2022-06-03 [1] CRAN (R 4.1.0)
## promises 1.2.0.1 2021-02-11 [2] CRAN (R 4.1.3)
## ps 1.7.1 2022-06-18 [1] CRAN (R 4.1.0)
## purrr * 0.3.4 2020-04-17 [2] CRAN (R 4.1.3)
## R6 2.5.1 2021-08-19 [1] CRAN (R 4.1.0)
## ragg 1.2.2 2022-02-21 [1] CRAN (R 4.1.0)
## RANN 2.6.1 2019-01-08 [2] CRAN (R 4.1.3)
## rappdirs 0.3.3 2021-01-31 [2] CRAN (R 4.1.3)
## RColorBrewer 1.1-3 2022-04-03 [1] CRAN (R 4.1.3)
## Rcpp 1.0.9 2022-07-08 [1] CRAN (R 4.1.3)
## RcppAnnoy 0.0.19 2021-07-30 [1] CRAN (R 4.1.0)
## RcppZiggurat 0.1.6 2020-10-20 [1] CRAN (R 4.1.3)
## RCurl 1.98-1.7 2022-06-09 [1] CRAN (R 4.1.0)
## readr * 2.1.2 2022-01-30 [1] CRAN (R 4.1.0)
## readxl 1.4.0 2022-03-28 [1] CRAN (R 4.1.0)
## rematch2 2.1.2 2020-05-01 [2] CRAN (R 4.1.3)
## reprex 2.0.2 2022-08-17 [2] CRAN (R 4.1.3)
## reshape2 1.4.4 2020-04-09 [2] CRAN (R 4.1.3)
## reticulate * 1.25 2022-05-11 [1] CRAN (R 4.1.3)
## Rfast 2.0.6 2022-02-16 [1] CRAN (R 4.1.3)
## rgdal 1.5-32 2022-05-09 [1] CRAN (R 4.1.3)
## rgeos 0.5-9 2021-12-15 [2] CRAN (R 4.1.3)
## rlang 1.0.4 2022-07-12 [1] CRAN (R 4.1.3)
## rmarkdown 2.14 2022-04-25 [1] CRAN (R 4.1.3)
## ROCR 1.0-11 2020-05-02 [2] CRAN (R 4.1.3)
## rpart 4.1.16 2022-01-24 [2] CRAN (R 4.1.3)
## rprojroot 2.0.3 2022-04-02 [1] CRAN (R 4.1.3)
## RSQLite 2.2.15 2022-07-17 [1] CRAN (R 4.1.3)
## rstudioapi 0.14 2022-08-22 [2] CRAN (R 4.1.3)
## Rtsne 0.16 2022-04-17 [1] CRAN (R 4.1.3)
## rvest 1.0.2 2021-10-16 [1] CRAN (R 4.1.0)
## S4Vectors * 0.34.0 2022-04-26 [1] Bioconductor
## sass 0.4.2 2022-07-16 [1] CRAN (R 4.1.3)
## scaffold * 0.2.0 2022-10-28 [1] Github (rhondabacher/scaffold@714c319)
## scales 1.2.0 2022-04-13 [1] CRAN (R 4.1.3)
## scattermore 0.8 2022-02-14 [1] CRAN (R 4.1.0)
## SCISSORS * 1.2.0 2022-11-12 [1] Github (jr-leary7/SCISSORS@3459cf5)
## sctransform 0.3.4 2022-08-20 [2] CRAN (R 4.1.3)
## segmented * 1.6-0 2022-05-31 [1] CRAN (R 4.1.0)
## sessioninfo 1.2.2 2021-12-06 [2] CRAN (R 4.1.3)
## Seurat * 4.1.1 2022-05-02 [1] CRAN (R 4.1.3)
## SeuratObject * 4.1.0 2022-05-01 [1] CRAN (R 4.1.3)
## shiny 1.7.2 2022-07-19 [1] CRAN (R 4.1.3)
## SingleCellExperiment * 1.16.0 2021-10-26 [2] Bioconductor
## sp * 1.5-0 2022-06-05 [1] CRAN (R 4.1.0)
## spatstat.core 2.4-4 2022-05-18 [1] CRAN (R 4.1.0)
## spatstat.data 3.0-0 2022-10-21 [1] CRAN (R 4.1.3)
## spatstat.geom 3.0-3 2022-10-25 [1] CRAN (R 4.1.3)
## spatstat.random 3.0-1 2022-11-03 [1] CRAN (R 4.1.3)
## spatstat.sparse 3.0-0 2022-10-21 [1] CRAN (R 4.1.3)
## spatstat.utils 3.0-1 2022-10-19 [1] CRAN (R 4.1.3)
## stringi 1.7.8 2022-07-11 [1] CRAN (R 4.1.3)
## stringr * 1.4.1 2022-08-20 [2] CRAN (R 4.1.3)
## SummarizedExperiment * 1.24.0 2021-10-26 [2] Bioconductor
## survival 3.3-1 2022-03-03 [1] CRAN (R 4.1.0)
## systemfonts 1.0.4 2022-02-11 [1] CRAN (R 4.1.0)
## tarchetypes * 0.7.1 2022-09-07 [1] CRAN (R 4.1.3)
## targets * 0.13.5 2022-09-26 [1] CRAN (R 4.1.3)
## tensor 1.5 2012-05-05 [2] CRAN (R 4.1.3)
## textshaping 0.3.6 2021-10-13 [2] CRAN (R 4.1.3)
## tibble * 3.1.8 2022-07-22 [1] CRAN (R 4.1.3)
## tidyr * 1.2.0 2022-02-01 [1] CRAN (R 4.1.3)
## tidyselect 1.1.2 2022-02-21 [1] CRAN (R 4.1.0)
## tidyverse * 1.3.2 2022-07-18 [1] CRAN (R 4.1.3)
## tzdb 0.3.0 2022-03-28 [1] CRAN (R 4.1.0)
## utf8 1.2.2 2021-07-24 [1] CRAN (R 4.1.0)
## uwot 0.1.11 2021-12-02 [1] CRAN (R 4.1.0)
## vctrs 0.4.1 2022-04-13 [1] CRAN (R 4.1.3)
## viridisLite 0.4.1 2022-08-22 [2] CRAN (R 4.1.3)
## withr 2.5.0 2022-03-03 [1] CRAN (R 4.1.3)
## wrswoR 1.1.1 2020-07-26 [1] CRAN (R 4.1.3)
## xfun 0.31 2022-05-10 [1] CRAN (R 4.1.3)
## XML 3.99-0.10 2022-06-09 [1] CRAN (R 4.1.0)
## xml2 1.3.3 2021-11-30 [2] CRAN (R 4.1.3)
## xtable 1.8-4 2019-04-21 [2] CRAN (R 4.1.3)
## XVector 0.34.0 2021-10-26 [2] Bioconductor
## yaml 2.3.5 2022-02-21 [1] CRAN (R 4.1.0)
## zlibbioc 1.40.0 2021-10-26 [2] Bioconductor
## zoo 1.8-10 2022-04-15 [1] CRAN (R 4.1.3)
##
## [1] /nas/longleaf/home/jrleary/r_packages_default
## [2] /nas/longleaf/rhel8/apps/r/4.1.3/lib64/R/library
##
## ─ Python configuration ───────────────────────────────────────────────────────
## python: /nas/longleaf/home/jrleary/Python/bin/python
## libpython: /usr/lib64/libpython3.6m.so
## pythonhome: /nas/longleaf/home/jrleary/Python:/nas/longleaf/home/jrleary/Python
## version: 3.6.8 (default, Sep 9 2021, 07:49:02) [GCC 8.5.0 20210514 (Red Hat 8.5.0-3)]
## numpy: /nas/longleaf/home/jrleary/Python/lib/python3.6/site-packages/numpy
## numpy_version: 1.19.5
##
## NOTE: Python version was forced by use_python function
##
## ──────────────────────────────────────────────────────────────────────────────